rm(list = ls())
options(warn=-1)
#-------------------------------------------------------------------------------------
setwd('~/Dropbox/Research/compliance_blocking/replication/GOTV')
library(logr)
# Open log
lf <- log_open("05_variable_importance.log")
load('Data/generated/gotv_cleaned.Rdata')
#-------------------------------------------------------------------------------------
covariates = c("age", "turf", "voted99", "voted00", "missing99", "famsize", "primary",
    "missing_primary", "age18", "age35", "age50", "age65", "age2")

df_all = df
cities = unique(df_all$city)
#-------------------------------------------------------------------------------------
#Select covariates for compliance blocking
library(ranger)
library(tidyverse)
#-------------------------------------------------------------------------------------
log_print(sessionInfo())
#-------------------------------------------------------------------------------------
set.seed(1234)
compliance_models<-list()
for(city in cities){
    df_subset = df_all[which(df_all$city == city),]
    X = df_subset[which(df_subset$T==1),] %>%
                dplyr::select(-c(T, C, Y, age18, age35, age50, age65, missing_primary, missing99))
    if(!(city %in% c("Bridgeport", "Raleigh"))){
        X<-X %>% dplyr::select(-c(party))
    }
    if(city != "Raleigh"){
        X<-X %>% dplyr::select(-c("race"))
    }
    if(city=='Bridgeport'){
        X<-X %>% dplyr::select(-c(voted99))
    }
    if(city=='Raleigh'){
        X<-X %>% dplyr::select(-c(voted00))
    }
    if(city=="Columbus"){
        X<-X %>% dplyr::select(-c(age))
    }
    C = as.factor(df_subset$C[which(df_subset$T==1)])
    model_data = data.frame(C, X)
    model_compliance = ranger(C~., data = model_data, importance='permutation')
    compliance_models[[city]]<-model_compliance
}

outcome_models<-list()
for(city in cities){
    df_subset = df_all[which(df_all$city == city),]
    X = df_subset[which(df_subset$T==0),] %>%
                dplyr::select(-c(T, C, Y, age18, age35, age50, age65, missing_primary, missing99))
    if(!(city %in% c("Bridgeport", "Raleigh"))){
        X<-X %>% dplyr::select(-c(party))
    }
    if(city != "Raleigh"){
        X<-X %>% dplyr::select(-c("race"))
    }
    if(city=='Bridgeport'){
        X<-X %>% dplyr::select(-c(voted99))
    }
    if(city=='Raleigh'){
        X<-X %>% dplyr::select(-c(voted00))
    }
    if(city=="Columbus"){
        X<-X %>% dplyr::select(-c(age))
    }
    Y = as.factor(df_subset$Y[which(df_subset$T==0)] )
    model_data = data.frame(Y, X)
    model_outcomes = ranger(Y~., data=model_data, importance='permutation')
    outcome_models[[city]]<-model_outcomes
}
#-------------------------------------------------------------------------------------
df_varImp = c()
for(i in 1:6){
    df_varImp<-rbind(
        df_varImp,
        data.frame(site = cities[i],
                    covariates = names(outcome_models[[i]]$variable.importance),
                    varImp = outcome_models[[i]]$variable.importance,
                    measure = "Outcome"),
        data.frame(site = cities[i],
                    covariates = names(compliance_models[[i]]$variable.importance),
                    varImp = compliance_models[[i]]$variable.importance,
                    measure = "Compliance")
    )
}
#-------------------------------------------------------------------------------------
df_varImp$covariates<-paste(df_varImp$covariates)
df_varImp$covariates[which(df_varImp$covariates=="voted99")]<-"Voted in '99"
df_varImp$covariates[which(df_varImp$covariates=="voted00")]<-"Voted in '00"
df_varImp$covariates[which(df_varImp$covariates=="turf")]<-"Turf"
df_varImp$covariates[which(df_varImp$covariates=="primary")]<-"Voted in Prim."
df_varImp$covariates[which(df_varImp$covariates=="famsize")]<-"Family Size"
df_varImp$covariates[which(df_varImp$covariates=="age")]<-"Age"
df_varImp$covariates[which(df_varImp$covariates=="age2")]<-"Age^2"
df_varImp$covariates[which(df_varImp$covariates=="race")]<-"Race"
df_varImp$covariates[which(df_varImp$covariates=="party")]<-"Party"
df_varImp<-df_varImp[-which(df_varImp$covariates=='city'),]
#-------------------------------------------------------------------------------------
#FIGURE A-4.3:
df_varImp[df_varImp$measure=="Outcome",] %>% ggplot(aes(x = covariates, y = abs(varImp)))+
geom_bar(stat='identity', width=0.5, fill='skyblue3') + facet_wrap(~site) + coord_flip() +
xlab("Covariate") + ylab("Increase in the Prediction Error from Permuting")+theme(legend.position='none')+
ggtitle("Outcome")+geom_hline(yintercept = 0.01, alpha=0.9)+
theme_bw() + theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
                             axis.text=element_text(size=14), #size = 12
                             #legend.text=element_text(size=7),
                             legend.position='bottom', axis.title = element_text(size = 14),
                             strip.text.x = element_text(size = 12, face='bold'),
                             strip.text.y = element_text(size = 12, face='bold'),
                             plot.subtitle=element_text(size = 14, hjust=0.5),
                             axis.text.x = element_text(angle=90, vjust = 0.5, hjust=1))
ggsave("../Figures/figA4_3_1_varImp_outcome.pdf", height=6.5, width=10)

df_varImp[df_varImp$measure=="Compliance",] %>% ggplot(aes(x = covariates, y = abs(varImp)))+
geom_bar(stat='identity', width=0.5, fill='salmon') + facet_wrap(~site) + coord_flip() +
xlab("Covariate") + ylab("Increase in the Prediction Error from Permuting")+theme(legend.position='none')+
ggtitle("Compliance")+geom_hline(yintercept = 0.01, alpha=0.9)+
theme_bw() + theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
                             axis.text=element_text(size=14), #size = 12
                             #legend.text=element_text(size=7),
                             legend.position='bottom', axis.title = element_text(size = 14),
                             strip.text.x = element_text(size = 12, face='bold'),
                             strip.text.y = element_text(size = 12, face='bold'),
                             plot.subtitle=element_text(size = 14, hjust=0.5),
                             axis.text.x = element_text(angle=90, vjust = 0.5, hjust=1))
ggsave("../Figures/figA4_3_2_varImp_compliance.pdf", height=6.5, width=10)

log_close()
